import jieba

path = 'C:/pycharmproject/project1/Seq2/data/'
en_path = path + 'train.en'

with open(en_path, 'r', encoding='utf-8') as f:
    data = f.readlines()
    with open(path + 'test.en', 'w', encoding='utf-8') as en:
        for i, text in enumerate(data):
            print(f"Processing line {i+1}: {text.strip()}")
            if text.strip():  # 检查文本是否为空行
                try:
                    fenci = jieba.lcut(text)
                    sen = ' '.join(fenci)
                    en.write(sen + '\n')
                    print(f"Tokenized: {sen}")
                except Exception as e:
                    print(f"Error tokenizing line {i+1}: {e}")
            else:
                en.write('\n')

print("Processing complete.")



